import os
from collections import namedtuple
import glob
# import pandas as pd
# from gooey import Gooey, GooeyParser
import pikepdf
import fitz
import openpyxl


def get_toc(file):
    doc = fitz.open(file)
    toc = doc.get_toc()
    # print(type(toc))
    # print(toc)
    return toc


def toc_to_excel(toc, xlsx_file):
    wb = openpyxl.Workbook()
    sheet = wb.active
    sheet.title = "bookmark"
    r = 0
    for ol in toc:
        r += 1
        indent = ol[0]
        title = ol[1]
        page = ol[2]
        ct = sheet.cell(row=r, column=indent)
        ct.value = title
        cp = sheet.cell(row=r, column=indent + 1)
        cp.value = page

    wb.save(xlsx_file)


def extract_bookmarks(pdf_folder, excel_folder):
    for file in glob.glob(rf'{pdf_folder}\**\*.pdf', recursive=True):
        # print(file)
        excel_file = file.replace(pdf_folder, excel_folder).replace('.pdf', '.xlsx')
        parent_folder = os.path.dirname(excel_file)
        os.makedirs(parent_folder, exist_ok=True)
        toc = get_toc(file)
        toc_to_excel(toc, excel_file)
        print(file, '-->', excel_file)


if __name__ == '__main__':
    pdf_folder = r'D:\dl\数据样本\输入文件'
    excel_folder = r'D:\dl\bkmk'
    extract_bookmarks(pdf_folder, excel_folder)
